<?php
    echo " Welcome to SK..!!!!"."</br>";
    echo " List of urls after web crawling"."</br>";
    webcrawl();
    
    function webcrawl() {
        
        
        $link = array();
        $link[0] = "http://www.webucator.com/";
        $link[1] = "https://www.coursera.org/";
        $link[2] = "http://marakana.com/";
        $link[3] = "http://www.learningtree.com/";
        $link[4] = "http://en.wikipedia.org/wiki/List_of_programming_languages";
        
        
        foreach($link as $url){
            
            
            $html = file_get_contents($url);
            $dom_object = new DOMDocument();
            @$dom_object->loadHTML($html);
            $xpath_node = new DOMXPath($dom_object);
            $hrefs_links = $xpath_node->evaluate("/html/body//a");
            $nested_url = array();
            
            for ($count = 0; $count < $hrefs_links->length; $count++) {
                $href = $hrefs_links->item($count);
                $url_name = $href->getAttribute('href');
                $nested_url[] = $url_name;
                
            }
        
        
            $good_urls = preg_grep( "/\b(?:(?:https?|ftp):\/\/|www\.)[-a-z0-9+&@#\/%?=~_|!:,.;]*[-a-z0-9+&@#\/%=~_|]/i", $nested_url);
            //$print_array = implode ( "</br>", $good_urls);
            //echo $print_array;
            echo "--------------------------------"."</br>";
        
        
            valid_url($good_urls);
            
            
            
            
        }
    }
    
    
    function valid_url($good_urls) {
        
        
        //echo " valid links are :"."</br>";
        
        foreach ($good_urls as $validity){
            
            $i=0;
            @$page = file_get_contents($validity);
            $page = strtolower($page);
            
            // POLICIES
            //$pattern  = '/\bjava\b/i';
            $coursename_count = substr_count($page, 'java ');
            //echo $coursename_count."</br>";
            $training_count = substr_count($page,'training');
            //echo $training_count."</br>";
            $course_count = substr_count($page,'course');
            //echo $course_count."</br>";
            
            if ( $coursename_count >1 AND $training_count >1 AND $course_count >1){
                
                echo $validity."</br>";
                $i++;
                
            }
            else {
                $j++;
            }
            
        }
        
        
    }

    
?>
        

